import scrapy
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule

from scrapy_readbook_Dome.items import ScrapyReadbookDomeItem


class ReadSpider(CrawlSpider):
    name = "read"
    allowed_domains = ["www.dushu.com"]
    start_urls = ["https://www.dushu.com/book/1188_1.html"]

    rules = (Rule(LinkExtractor(allow=r"book/1188_\d+\.html"), # 匹配规则正则表达式
                                callback="parse_item",
                                follow=True),
             )

    def parse_item(self, response):
        img_list=response.xpath('//div[@class="bookslist"]//a/img')
        i=0
        for img in img_list:
            src=img.xpath('./@data-original').extract_first()
            name=img.xpath('./@alt').extract_first()
            authors=response.xpath('//div[@class="bookslist"]//p[1]/text()').extract()
            author=authors[i]
            i+=1
            book=ScrapyReadbookDomeItem(name=name,src=src,author=author)

        #//div[@class="bookslist"]//a/img/@data-original
        #//div[@class="bookslist"]//a/img/@alt
        #//div[@class="bookslist"]//p[1]
            yield book
